{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import os\n",
    "\n",
    "os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = '../t5/prepare/mesolitica-tpu.json'\n",
    "os.environ['CUDA_VISIBLE_DEVICES'] = '1'\n",
    "\n",
    "from google.cloud import storage\n",
    "client = storage.Client()\n",
    "bucket = client.bucket('mesolitica-tpu-general')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "os.system('mkdir out-large')\n",
    "\n",
    "blob = bucket.blob('albert-large/model.ckpt-475000.data-00000-of-00001')\n",
    "blob.download_to_filename('out-large/model.ckpt-475000.data-00000-of-00001')\n",
    "\n",
    "blob = bucket.blob('albert-large/model.ckpt-475000.index')\n",
    "blob.download_to_filename('out-large/model.ckpt-475000.index')\n",
    "\n",
    "blob = bucket.blob('albert-large/model.ckpt-475000.meta')\n",
    "blob.download_to_filename('out-large/model.ckpt-475000.meta')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "WARNING:tensorflow:From /home/husein/.local/lib/python3.6/site-packages/albert/lamb_optimizer.py:34: The name tf.train.Optimizer is deprecated. Please use tf.compat.v1.train.Optimizer instead.\n",
      "\n"
     ]
    }
   ],
   "source": [
    "from albert import modeling\n",
    "from albert import optimization\n",
    "from albert import tokenization\n",
    "import tensorflow as tf\n",
    "import numpy as np"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "WARNING:tensorflow:From /home/husein/.local/lib/python3.6/site-packages/albert/tokenization.py:240: The name tf.logging.info is deprecated. Please use tf.compat.v1.logging.info instead.\n",
      "\n",
      "INFO:tensorflow:loading sentence piece model\n"
     ]
    }
   ],
   "source": [
    "tokenizer = tokenization.FullTokenizer(\n",
    "      vocab_file='sp10m.cased.v10.vocab', do_lower_case=False,\n",
    "      spm_model_file='sp10m.cased.v10.model')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "['▁Hu', 'se', 'in', '▁comel']"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "tokenizer.tokenize('Husein comel')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "WARNING:tensorflow:From /home/husein/.local/lib/python3.6/site-packages/albert/modeling.py:116: The name tf.gfile.GFile is deprecated. Please use tf.io.gfile.GFile instead.\n",
      "\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "<albert.modeling.AlbertConfig at 0x7f9209fddc18>"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "albert_config = modeling.AlbertConfig.from_json_file('LARGE_config.json')\n",
    "albert_config"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [],
   "source": [
    "def gather_indexes(sequence_tensor, positions):\n",
    "    \"\"\"Gathers the vectors at the specific positions over a minibatch.\"\"\"\n",
    "    sequence_shape = modeling.get_shape_list(sequence_tensor, expected_rank=3)\n",
    "    batch_size = sequence_shape[0]\n",
    "    seq_length = sequence_shape[1]\n",
    "    width = sequence_shape[2]\n",
    "\n",
    "    flat_offsets = tf.reshape(\n",
    "      tf.range(0, batch_size, dtype=tf.int32) * seq_length, [-1, 1])\n",
    "    flat_positions = tf.reshape(positions + flat_offsets, [-1])\n",
    "    flat_sequence_tensor = tf.reshape(sequence_tensor,\n",
    "                                    [batch_size * seq_length, width])\n",
    "    output_tensor = tf.gather(flat_sequence_tensor, flat_positions)\n",
    "    return output_tensor\n",
    "\n",
    "class Model:\n",
    "    def __init__(\n",
    "        self,\n",
    "    ):\n",
    "        self.X = tf.placeholder(tf.int32, [None, None])\n",
    "        self.segment_ids = tf.placeholder(tf.int32, [None, None])\n",
    "        self.input_masks = tf.placeholder(tf.int32, [None, None])\n",
    "        \n",
    "        model = modeling.AlbertModel(\n",
    "            config=albert_config,\n",
    "            is_training=False,\n",
    "            input_ids=self.X,\n",
    "            input_mask=self.input_masks,\n",
    "            token_type_ids=self.segment_ids,\n",
    "            use_one_hot_embeddings=False)\n",
    "        \n",
    "        input_tensor = model.get_sequence_output()\n",
    "        output_weights = model.get_embedding_table()\n",
    "        \n",
    "        with tf.variable_scope(\"cls/predictions\"):\n",
    "            with tf.variable_scope(\"transform\"):\n",
    "                input_tensor = tf.layers.dense(\n",
    "                              input_tensor,\n",
    "                              units=albert_config.embedding_size,\n",
    "                              activation=modeling.get_activation(albert_config.hidden_act),\n",
    "                              kernel_initializer=modeling.create_initializer(\n",
    "                                  albert_config.initializer_range))\n",
    "                input_tensor = modeling.layer_norm(input_tensor)\n",
    "            \n",
    "            output_bias = tf.get_variable(\n",
    "                \"output_bias\",\n",
    "                shape=[albert_config.vocab_size],\n",
    "                initializer=tf.zeros_initializer())\n",
    "            logits = tf.matmul(input_tensor, output_weights, transpose_b=True)\n",
    "            logits = tf.nn.bias_add(logits, output_bias)\n",
    "            log_probs = tf.nn.log_softmax(logits, axis=-1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "WARNING:tensorflow:From /home/husein/.local/lib/python3.6/site-packages/albert/modeling.py:194: The name tf.variable_scope is deprecated. Please use tf.compat.v1.variable_scope instead.\n",
      "\n",
      "WARNING:tensorflow:From /home/husein/.local/lib/python3.6/site-packages/albert/modeling.py:507: The name tf.get_variable is deprecated. Please use tf.compat.v1.get_variable instead.\n",
      "\n",
      "WARNING:tensorflow:From /home/husein/.local/lib/python3.6/site-packages/albert/modeling.py:588: The name tf.assert_less_equal is deprecated. Please use tf.compat.v1.assert_less_equal instead.\n",
      "\n",
      "WARNING:tensorflow:From /home/husein/.local/lib/python3.6/site-packages/albert/modeling.py:1025: The name tf.AUTO_REUSE is deprecated. Please use tf.compat.v1.AUTO_REUSE instead.\n",
      "\n",
      "WARNING:tensorflow:From /home/husein/.local/lib/python3.6/site-packages/albert/modeling.py:253: dense (from tensorflow.python.layers.core) is deprecated and will be removed in a future version.\n",
      "Instructions for updating:\n",
      "Use keras.layers.Dense instead.\n",
      "WARNING:tensorflow:From /home/husein/.local/lib/python3.6/site-packages/tensorflow_core/python/layers/core.py:187: Layer.apply (from tensorflow.python.keras.engine.base_layer) is deprecated and will be removed in a future version.\n",
      "Instructions for updating:\n",
      "Please use `layer.__call__` method instead.\n"
     ]
    }
   ],
   "source": [
    "tf.reset_default_graph()\n",
    "sess = tf.InteractiveSession()\n",
    "model = Model()\n",
    "\n",
    "sess.run(tf.global_variables_initializer())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "INFO:tensorflow:Restoring parameters from out-large/model.ckpt-475000\n"
     ]
    }
   ],
   "source": [
    "var_lists = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope = 'bert')\n",
    "cls = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope = 'cls')\n",
    "saver = tf.train.Saver(var_list = var_lists + cls)\n",
    "saver.restore(sess, 'out-large/model.ckpt-475000')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'albert-large/model.ckpt'"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "saver = tf.train.Saver(tf.trainable_variables())\n",
    "saver.save(sess, 'albert-large/model.ckpt')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [],
   "source": [
    "import os\n",
    "\n",
    "out = 'albert-large-bahasa-standard-cased'\n",
    "os.makedirs(out, exist_ok=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [],
   "source": [
    "from transformers import AlbertTokenizer, AlbertModel, AlbertConfig, AutoTokenizer, AutoModelWithLMHead, pipeline"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "('albert-large-bahasa-standard-cased/spiece.model',\n",
       " 'albert-large-bahasa-standard-cased/special_tokens_map.json',\n",
       " 'albert-large-bahasa-standard-cased/added_tokens.json')"
      ]
     },
     "execution_count": 13,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "tokenizer = AlbertTokenizer('sp10m.cased.v10.model', do_lower_case = False)\n",
    "tokenizer.save_pretrained(out)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [],
   "source": [
    "import torch\n",
    "import logging\n",
    "from transformers import AlbertConfig, AlbertForMaskedLM, load_tf_weights_in_albert\n",
    "\n",
    "\n",
    "logging.basicConfig(level=logging.INFO)\n",
    "\n",
    "\n",
    "def convert_tf_checkpoint_to_pytorch(tf_checkpoint_path, albert_config_file, pytorch_dump_path):\n",
    "    # Initialise PyTorch model\n",
    "    config = AlbertConfig.from_json_file(albert_config_file)\n",
    "    print(\"Building PyTorch model from configuration: {}\".format(str(config)))\n",
    "    model = AlbertForMaskedLM(config)\n",
    "\n",
    "    # Load weights from tf checkpoint\n",
    "    load_tf_weights_in_albert(model, config, tf_checkpoint_path)\n",
    "\n",
    "    # Save pytorch-model\n",
    "    print(\"Save PyTorch model to {}\".format(pytorch_dump_path))\n",
    "    torch.save(model.state_dict(), pytorch_dump_path)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {
    "scrolled": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Building PyTorch model from configuration: AlbertConfig {\n",
      "  \"attention_probs_dropout_prob\": 0,\n",
      "  \"bos_token_id\": 2,\n",
      "  \"classifier_dropout_prob\": 0.1,\n",
      "  \"down_scale_factor\": 1,\n",
      "  \"embedding_size\": 128,\n",
      "  \"eos_token_id\": 3,\n",
      "  \"gap_size\": 0,\n",
      "  \"hidden_act\": \"gelu\",\n",
      "  \"hidden_dropout_prob\": 0,\n",
      "  \"hidden_size\": 1024,\n",
      "  \"initializer_range\": 0.02,\n",
      "  \"inner_group_num\": 1,\n",
      "  \"intermediate_size\": 4096,\n",
      "  \"layer_norm_eps\": 1e-12,\n",
      "  \"max_position_embeddings\": 512,\n",
      "  \"model_type\": \"albert\",\n",
      "  \"net_structure_type\": 0,\n",
      "  \"num_attention_heads\": 16,\n",
      "  \"num_hidden_groups\": 1,\n",
      "  \"num_hidden_layers\": 24,\n",
      "  \"num_memory_blocks\": 0,\n",
      "  \"pad_token_id\": 0,\n",
      "  \"type_vocab_size\": 2,\n",
      "  \"vocab_size\": 32000\n",
      "}\n",
      "\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "INFO:transformers.modeling_albert:Converting TensorFlow checkpoint from /home/husein/albert/albert-large/model.ckpt\n",
      "INFO:transformers.modeling_albert:Loading TF weight bert/embeddings/LayerNorm/beta with shape [128]\n",
      "INFO:transformers.modeling_albert:Loading TF weight bert/embeddings/LayerNorm/gamma with shape [128]\n",
      "INFO:transformers.modeling_albert:Loading TF weight bert/embeddings/position_embeddings with shape [512, 128]\n",
      "INFO:transformers.modeling_albert:Loading TF weight bert/embeddings/token_type_embeddings with shape [2, 128]\n",
      "INFO:transformers.modeling_albert:Loading TF weight bert/embeddings/word_embeddings with shape [32000, 128]\n",
      "INFO:transformers.modeling_albert:Loading TF weight bert/encoder/embedding_hidden_mapping_in/bias with shape [1024]\n",
      "INFO:transformers.modeling_albert:Loading TF weight bert/encoder/embedding_hidden_mapping_in/kernel with shape [128, 1024]\n",
      "INFO:transformers.modeling_albert:Loading TF weight bert/encoder/transformer/group_0/inner_group_0/LayerNorm/beta with shape [1024]\n",
      "INFO:transformers.modeling_albert:Loading TF weight bert/encoder/transformer/group_0/inner_group_0/LayerNorm/gamma with shape [1024]\n",
      "INFO:transformers.modeling_albert:Loading TF weight bert/encoder/transformer/group_0/inner_group_0/LayerNorm_1/beta with shape [1024]\n",
      "INFO:transformers.modeling_albert:Loading TF weight bert/encoder/transformer/group_0/inner_group_0/LayerNorm_1/gamma with shape [1024]\n",
      "INFO:transformers.modeling_albert:Loading TF weight bert/encoder/transformer/group_0/inner_group_0/attention_1/output/dense/bias with shape [1024]\n",
      "INFO:transformers.modeling_albert:Loading TF weight bert/encoder/transformer/group_0/inner_group_0/attention_1/output/dense/kernel with shape [1024, 1024]\n",
      "INFO:transformers.modeling_albert:Loading TF weight bert/encoder/transformer/group_0/inner_group_0/attention_1/self/key/bias with shape [1024]\n",
      "INFO:transformers.modeling_albert:Loading TF weight bert/encoder/transformer/group_0/inner_group_0/attention_1/self/key/kernel with shape [1024, 1024]\n",
      "INFO:transformers.modeling_albert:Loading TF weight bert/encoder/transformer/group_0/inner_group_0/attention_1/self/query/bias with shape [1024]\n",
      "INFO:transformers.modeling_albert:Loading TF weight bert/encoder/transformer/group_0/inner_group_0/attention_1/self/query/kernel with shape [1024, 1024]\n",
      "INFO:transformers.modeling_albert:Loading TF weight bert/encoder/transformer/group_0/inner_group_0/attention_1/self/value/bias with shape [1024]\n",
      "INFO:transformers.modeling_albert:Loading TF weight bert/encoder/transformer/group_0/inner_group_0/attention_1/self/value/kernel with shape [1024, 1024]\n",
      "INFO:transformers.modeling_albert:Loading TF weight bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/dense/bias with shape [4096]\n",
      "INFO:transformers.modeling_albert:Loading TF weight bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/dense/kernel with shape [1024, 4096]\n",
      "INFO:transformers.modeling_albert:Loading TF weight bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/output/dense/bias with shape [1024]\n",
      "INFO:transformers.modeling_albert:Loading TF weight bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/output/dense/kernel with shape [4096, 1024]\n",
      "INFO:transformers.modeling_albert:Loading TF weight bert/pooler/dense/bias with shape [1024]\n",
      "INFO:transformers.modeling_albert:Loading TF weight bert/pooler/dense/kernel with shape [1024, 1024]\n",
      "INFO:transformers.modeling_albert:Loading TF weight cls/predictions/output_bias with shape [32000]\n",
      "INFO:transformers.modeling_albert:Loading TF weight cls/predictions/transform/LayerNorm/beta with shape [128]\n",
      "INFO:transformers.modeling_albert:Loading TF weight cls/predictions/transform/LayerNorm/gamma with shape [128]\n",
      "INFO:transformers.modeling_albert:Loading TF weight cls/predictions/transform/dense/bias with shape [128]\n",
      "INFO:transformers.modeling_albert:Loading TF weight cls/predictions/transform/dense/kernel with shape [1024, 128]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "bert/embeddings/LayerNorm/beta\n",
      "bert/embeddings/LayerNorm/gamma\n",
      "bert/embeddings/position_embeddings\n",
      "bert/embeddings/token_type_embeddings\n",
      "bert/embeddings/word_embeddings\n",
      "bert/encoder/embedding_hidden_mapping_in/bias\n",
      "bert/encoder/embedding_hidden_mapping_in/kernel\n",
      "bert/encoder/transformer/group_0/inner_group_0/LayerNorm/beta\n",
      "bert/encoder/transformer/group_0/inner_group_0/LayerNorm/gamma\n",
      "bert/encoder/transformer/group_0/inner_group_0/LayerNorm_1/beta\n",
      "bert/encoder/transformer/group_0/inner_group_0/LayerNorm_1/gamma\n",
      "bert/encoder/transformer/group_0/inner_group_0/attention_1/output/dense/bias\n",
      "bert/encoder/transformer/group_0/inner_group_0/attention_1/output/dense/kernel\n",
      "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/key/bias\n",
      "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/key/kernel\n",
      "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/query/bias\n",
      "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/query/kernel\n",
      "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/value/bias\n",
      "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/value/kernel\n",
      "bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/dense/bias\n",
      "bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/dense/kernel\n",
      "bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/output/dense/bias\n",
      "bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/output/dense/kernel\n",
      "bert/pooler/dense/bias\n",
      "bert/pooler/dense/kernel\n",
      "cls/predictions/output_bias\n",
      "cls/predictions/transform/LayerNorm/beta\n",
      "cls/predictions/transform/LayerNorm/gamma\n",
      "cls/predictions/transform/dense/bias\n",
      "cls/predictions/transform/dense/kernel\n",
      "Initialize PyTorch weight ['albert', 'embeddings', 'LayerNorm', 'beta'] from bert/embeddings/LayerNorm/beta\n",
      "Initialize PyTorch weight ['albert', 'embeddings', 'LayerNorm', 'gamma'] from bert/embeddings/LayerNorm/gamma\n",
      "Initialize PyTorch weight ['albert', 'embeddings', 'position_embeddings'] from bert/embeddings/position_embeddings\n",
      "Initialize PyTorch weight ['albert', 'embeddings', 'token_type_embeddings'] from bert/embeddings/token_type_embeddings\n",
      "Initialize PyTorch weight ['albert', 'embeddings', 'word_embeddings'] from bert/embeddings/word_embeddings\n",
      "Initialize PyTorch weight ['albert', 'encoder', 'embedding_hidden_mapping_in', 'bias'] from bert/encoder/embedding_hidden_mapping_in/bias\n",
      "Initialize PyTorch weight ['albert', 'encoder', 'embedding_hidden_mapping_in', 'kernel'] from bert/encoder/embedding_hidden_mapping_in/kernel\n",
      "Initialize PyTorch weight ['albert', 'encoder', 'albert_layer_groups', '0', 'albert_layers', '0', 'attention', 'LayerNorm', 'beta'] from bert/encoder/transformer/group_0/inner_group_0/LayerNorm/beta\n",
      "Initialize PyTorch weight ['albert', 'encoder', 'albert_layer_groups', '0', 'albert_layers', '0', 'attention', 'LayerNorm', 'gamma'] from bert/encoder/transformer/group_0/inner_group_0/LayerNorm/gamma\n",
      "Initialize PyTorch weight ['albert', 'encoder', 'albert_layer_groups', '0', 'albert_layers', '0', 'full_layer_layer_norm', 'beta'] from bert/encoder/transformer/group_0/inner_group_0/LayerNorm_1/beta\n",
      "Initialize PyTorch weight ['albert', 'encoder', 'albert_layer_groups', '0', 'albert_layers', '0', 'full_layer_layer_norm', 'gamma'] from bert/encoder/transformer/group_0/inner_group_0/LayerNorm_1/gamma\n",
      "Initialize PyTorch weight ['albert', 'encoder', 'albert_layer_groups', '0', 'albert_layers', '0', 'attention', 'dense', 'bias'] from bert/encoder/transformer/group_0/inner_group_0/attention_1/output/dense/bias\n",
      "Initialize PyTorch weight ['albert', 'encoder', 'albert_layer_groups', '0', 'albert_layers', '0', 'attention', 'dense', 'kernel'] from bert/encoder/transformer/group_0/inner_group_0/attention_1/output/dense/kernel\n",
      "Initialize PyTorch weight ['albert', 'encoder', 'albert_layer_groups', '0', 'albert_layers', '0', 'attention', 'key', 'bias'] from bert/encoder/transformer/group_0/inner_group_0/attention_1/self/key/bias\n",
      "Initialize PyTorch weight ['albert', 'encoder', 'albert_layer_groups', '0', 'albert_layers', '0', 'attention', 'key', 'kernel'] from bert/encoder/transformer/group_0/inner_group_0/attention_1/self/key/kernel\n",
      "Initialize PyTorch weight ['albert', 'encoder', 'albert_layer_groups', '0', 'albert_layers', '0', 'attention', 'query', 'bias'] from bert/encoder/transformer/group_0/inner_group_0/attention_1/self/query/bias\n",
      "Initialize PyTorch weight ['albert', 'encoder', 'albert_layer_groups', '0', 'albert_layers', '0', 'attention', 'query', 'kernel'] from bert/encoder/transformer/group_0/inner_group_0/attention_1/self/query/kernel\n",
      "Initialize PyTorch weight ['albert', 'encoder', 'albert_layer_groups', '0', 'albert_layers', '0', 'attention', 'value', 'bias'] from bert/encoder/transformer/group_0/inner_group_0/attention_1/self/value/bias\n",
      "Initialize PyTorch weight ['albert', 'encoder', 'albert_layer_groups', '0', 'albert_layers', '0', 'attention', 'value', 'kernel'] from bert/encoder/transformer/group_0/inner_group_0/attention_1/self/value/kernel\n",
      "Initialize PyTorch weight ['albert', 'encoder', 'albert_layer_groups', '0', 'albert_layers', '0', 'ffn', 'bias'] from bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/dense/bias\n",
      "Initialize PyTorch weight ['albert', 'encoder', 'albert_layer_groups', '0', 'albert_layers', '0', 'ffn', 'kernel'] from bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/dense/kernel\n",
      "Initialize PyTorch weight ['albert', 'encoder', 'albert_layer_groups', '0', 'albert_layers', '0', 'ffn_output', 'bias'] from bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/output/dense/bias\n",
      "Initialize PyTorch weight ['albert', 'encoder', 'albert_layer_groups', '0', 'albert_layers', '0', 'ffn_output', 'kernel'] from bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/output/dense/kernel\n",
      "Initialize PyTorch weight ['albert', 'pooler', 'bias'] from bert/pooler/dense/bias\n",
      "Initialize PyTorch weight ['albert', 'pooler', 'kernel'] from bert/pooler/dense/kernel\n",
      "Initialize PyTorch weight ['predictions', 'output_bias'] from cls/predictions/output_bias\n",
      "Initialize PyTorch weight ['predictions', 'LayerNorm', 'beta'] from cls/predictions/transform/LayerNorm/beta\n",
      "Initialize PyTorch weight ['predictions', 'LayerNorm', 'gamma'] from cls/predictions/transform/LayerNorm/gamma\n",
      "Initialize PyTorch weight ['predictions', 'dense', 'bias'] from cls/predictions/transform/dense/bias\n",
      "Initialize PyTorch weight ['predictions', 'dense', 'kernel'] from cls/predictions/transform/dense/kernel\n",
      "Save PyTorch model to albert-large-bahasa-standard-cased/pytorch_model.bin\n"
     ]
    }
   ],
   "source": [
    "convert_tf_checkpoint_to_pytorch('albert-large/model.ckpt', \n",
    "                                 'LARGE_config.json', \n",
    "                                 f'{out}/pytorch_model.bin')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [],
   "source": [
    "!rm -rf albert-large"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "out-large/\n",
      "out-large/sp10m.cased.v10.model\n",
      "out-large/sp10m.cased.v10.vocab\n",
      "out-large/model.ckpt-475000.index\n",
      "out-large/model.ckpt-475000.meta\n",
      "out-large/config.json\n",
      "out-large/model.ckpt-475000.data-00000-of-00001\n"
     ]
    }
   ],
   "source": [
    "!cp sp10m.cased.v10.* out-large\n",
    "!cp LARGE_config.json out-large/config.json\n",
    "!tar cvzf albert-large-475k-19-10-2020.tar.gz out-large"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "INFO:transformers.tokenization_utils_base:Model name './albert-large-bahasa-standard-cased' not found in model shortcut name list (albert-base-v1, albert-large-v1, albert-xlarge-v1, albert-xxlarge-v1, albert-base-v2, albert-large-v2, albert-xlarge-v2, albert-xxlarge-v2). Assuming './albert-large-bahasa-standard-cased' is a path, a model identifier, or url to a directory containing tokenizer files.\n",
      "INFO:transformers.tokenization_utils_base:Didn't find file ./albert-large-bahasa-standard-cased/added_tokens.json. We won't load it.\n",
      "INFO:transformers.tokenization_utils_base:Didn't find file ./albert-large-bahasa-standard-cased/tokenizer.json. We won't load it.\n",
      "INFO:transformers.tokenization_utils_base:loading file ./albert-large-bahasa-standard-cased/spiece.model\n",
      "INFO:transformers.tokenization_utils_base:loading file None\n",
      "INFO:transformers.tokenization_utils_base:loading file ./albert-large-bahasa-standard-cased/special_tokens_map.json\n",
      "INFO:transformers.tokenization_utils_base:loading file ./albert-large-bahasa-standard-cased/tokenizer_config.json\n",
      "INFO:transformers.tokenization_utils_base:loading file None\n"
     ]
    }
   ],
   "source": [
    "tokenizer = AlbertTokenizer.from_pretrained(f'./{out}', do_lower_case = False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {},
   "outputs": [],
   "source": [
    "config = AlbertConfig('LARGE_config.json')\n",
    "config.vocab_size = 32000\n",
    "config.intermediate_size = 4096\n",
    "config.hidden_size = 1024\n",
    "config.num_attention_heads = 16\n",
    "config.num_hidden_groups = 1\n",
    "config.num_hidden_layers = 24"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/home/husein/.local/lib/python3.6/site-packages/transformers/modeling_auto.py:798: FutureWarning: The class `AutoModelWithLMHead` is deprecated and will be removed in a future version. Please use `AutoModelForCausalLM` for causal language models, `AutoModelForMaskedLM` for masked language models and `AutoModelForSeq2SeqLM` for encoder-decoder models.\n",
      "  FutureWarning,\n",
      "INFO:transformers.modeling_utils:loading weights file ./albert-large-bahasa-standard-cased/pytorch_model.bin\n",
      "INFO:transformers.modeling_utils:All model checkpoint weights were used when initializing AlbertForMaskedLM.\n",
      "\n",
      "INFO:transformers.modeling_utils:All the weights of AlbertForMaskedLM were initialized from the model checkpoint at ./albert-large-bahasa-standard-cased/pytorch_model.bin.\n",
      "If your task is similar to the task the model of the ckeckpoint was trained on, you can already use AlbertForMaskedLM for predictions without further training.\n"
     ]
    }
   ],
   "source": [
    "model = AutoModelWithLMHead.from_pretrained(f'./{out}/pytorch_model.bin', config = config)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {},
   "outputs": [],
   "source": [
    "fill_mask = pipeline('fill-mask', model=model, tokenizer=tokenizer)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[{'sequence': '[CLS] tolonglah gov buat something, kami dah join[SEP]',\n",
       "  'score': 0.05382946506142616,\n",
       "  'token': 1801,\n",
       "  'token_str': '▁join'},\n",
       " {'sequence': '[CLS] tolonglah gov buat something, kami dah.[SEP]',\n",
       "  'score': 0.019622132182121277,\n",
       "  'token': 5,\n",
       "  'token_str': '.'},\n",
       " {'sequence': '[CLS] tolonglah gov buat something, kami dah tahu[SEP]',\n",
       "  'score': 0.017230793833732605,\n",
       "  'token': 178,\n",
       "  'token_str': '▁tahu'},\n",
       " {'sequence': '[CLS] tolonglah gov buat something, kami dah tau[SEP]',\n",
       "  'score': 0.016450127586722374,\n",
       "  'token': 157,\n",
       "  'token_str': '▁tau'},\n",
       " {'sequence': '[CLS] tolonglah gov buat something, kami dah,[SEP]',\n",
       "  'score': 0.013463296927511692,\n",
       "  'token': 19,\n",
       "  'token_str': ','}]"
      ]
     },
     "execution_count": 22,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "fill_mask('tolonglah gov buat something, kami dah [MASK]')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "INFO:transformers.configuration_utils:Configuration saved in albert-large-bahasa-standard-cased/config.json\n",
      "INFO:transformers.modeling_utils:Model weights saved in albert-large-bahasa-standard-cased/pytorch_model.bin\n"
     ]
    }
   ],
   "source": [
    "model.save_pretrained(out)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 37,
   "metadata": {},
   "outputs": [],
   "source": [
    "# !transformers-cli upload ./albert-large-bahasa-standard-cased"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.8"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
